Calculate time effect for timecourse of abdo and glut samples

Adding day15 floating and bulk samples to get DE stats on the combined timecourse effect (with floating and bulk separate??).
NB: I added robust=TRUE to the eBayes calls on 27/3/20, but the files haven’t been regenerated. So robust=FALSE there.

Load D13 timecourse

(with fractionally assigned multimapping alignments). Then format the table.

ous_dir = "C:/Users/sarahhp/OneDrive - Universitetet i Oslo/OUS"
d13_file = file.path(ous_dir, "D13rnaseqDec19/02featureCounts/d13_rnaseq.multim.counts")

d13_tab = read.delim(d13_file, skip=1, header=T, stringsAsFactors = FALSE)
colnames(d13_tab) =  gsub("output.01hisat.","", gsub(".sorted.bam","",colnames(d13_tab)))
#head(d13_tab) #table is messy so I won't print it here

Load file with sample info and match sample information to columns in FC table

info_file = file.path(ous_dir,"D13rnaseqDec19/sample_info.csv")
d13_info = read.delim(info_file, header=TRUE, stringsAsFactors = FALSE, sep = ",")
#head(d13_info)

d13_info$sample_id = gsub("-",".", d13_info$Sequencing_name)

#remove unnecessary columns from d13_info
names(d13_info)[names(d13_info)=='Additional..fill.in.2.3.digits.code..max.5..construct.etc..'] = 'donor'
names(d13_info)[names(d13_info)=='Replicate..fill.in.number.'] = 'biorep'
names(d13_info)[names(d13_info)=='timept'] = 'time'
d13_info = d13_info[c('sample_id','time','donor','biorep','Tube.Label')]

#Checked that FC table and d13_info are in the same order
#colnames(d13_tab)
#d13_info$sample_id

Load the Donor 7 timecourse:

And generate sample information from the bam file names

d7_file = file.path(ous_dir,"SeptRNAseq/02featureCounts/mar19_rnaseq.multim.counts")
d7_tab = read.delim(d7_file, skip=1, header=T, stringsAsFactors = FALSE)
colnames(d7_tab) =  gsub("output.01hisat.","", gsub(".sorted.bam","",colnames(d7_tab)))
head(d7_tab)
##            Geneid                   Chr
## 1 ENSG00000223972     1;1;1;1;1;1;1;1;1
## 2 ENSG00000227232 1;1;1;1;1;1;1;1;1;1;1
## 3 ENSG00000278267                     1
## 4 ENSG00000243485             1;1;1;1;1
## 5 ENSG00000284332                     1
## 6 ENSG00000237613             1;1;1;1;1
##                                                               Start
## 1             11869;12010;12179;12613;12613;12975;13221;13221;13453
## 2 14404;15005;15796;16607;16858;17233;17606;17915;18268;24738;29534
## 3                                                             17369
## 4                                     29554;30267;30564;30976;30976
## 5                                                             30366
## 6                                     34554;35245;35277;35721;35721
##                                                                 End
## 1             12227;12057;12227;12721;12697;13052;13374;14409;13670
## 2 14501;15038;15947;16765;17055;17368;17742;18061;18366;24891;29570
## 3                                                             17436
## 4                                     30039;30667;30667;31109;31097
## 5                                                             30503
## 6                                     35174;35481;35481;36073;36081
##                  Strand Length 1.KD4NT_S1 10.P6D3_S10 11.P6D9_S11 12.KD3NT_S12
## 1     +;+;+;+;+;+;+;+;+   1735       1.51        3.06        8.41         4.82
## 2 -;-;-;-;-;-;-;-;-;-;-   1351     187.13      223.71      171.04       235.24
## 3                     -     68       0.20        4.70        2.03         6.18
## 4             +;+;+;+;+   1021       0.20        1.14        0.00         1.03
## 5                     +    138       0.00        0.00        0.00         0.00
## 6             -;-;-;-;-   1219      37.19       42.05       25.39        25.61
##   13.P8D0_S13 14.P8D1_S14 15.P8D3_S15 16.P8D9_S16 2.KD5NT_S2 3.P5D0_S3
## 1        3.00        3.34        2.58        3.08       2.87      2.41
## 2      287.59      243.51      187.14      138.19      96.13    189.13
## 3        2.98        2.95        1.86        2.86       0.45      0.17
## 4        1.48        0.70        0.00        0.00       0.00      0.43
## 5        0.00        0.00        0.00        0.00       0.00      0.00
## 6       69.98       55.23       33.27       19.05      26.12     25.11
##   4.P5D1_S4 5.P5D3_S5 6.P5D9_S6 7.KD6NT_S7 8.P6D0_S8 9.P6D1_S9
## 1      1.58      1.18      1.65       2.75      0.67      2.50
## 2    159.41    218.31    135.33     106.75     93.00    143.30
## 3      1.68      1.07      0.48       0.67      0.00      1.37
## 4      0.25      0.52      0.00       0.00      1.22      0.54
## 5      0.00      0.00      0.00       0.00      0.00      0.00
## 6     32.61     33.20     24.30      16.86     30.08     27.54
#Remove the file system artefacts from the count column names to create a list of the libraries:
sample_id = grep("[[:digit:]]",colnames(d7_tab), value = T)
sample_id
##  [1] "1.KD4NT_S1"   "10.P6D3_S10"  "11.P6D9_S11"  "12.KD3NT_S12" "13.P8D0_S13" 
##  [6] "14.P8D1_S14"  "15.P8D3_S15"  "16.P8D9_S16"  "2.KD5NT_S2"   "3.P5D0_S3"   
## [11] "4.P5D1_S4"    "5.P5D3_S5"    "6.P5D9_S6"    "7.KD6NT_S7"   "8.P6D0_S8"   
## [16] "9.P6D1_S9"
#Next create a sample table with information about samples and time points taken from the filenames.
Tube.Label = gsub("\\..*_S[[:digit:]]+", "", sample_id)
time = gsub(".*(KD|P)[43586]D?", "", gsub("_S.*","",sample_id))
time = factor(gsub("NT","-2",time)) #match factors
biorep = gsub(".*\\.(KD|P)", "P", gsub("(D[0139]|NT)_S.*","",sample_id))
biorep =gsub("P3","P8",biorep) #fix biorep error
donor = rep("D07-G", 16)

d7_info = data.frame(sample_id,time,donor,biorep,Tube.Label)
d7_info
##       sample_id time donor biorep Tube.Label
## 1    1.KD4NT_S1   -2 D07-G     P4          1
## 2   10.P6D3_S10    3 D07-G     P6         10
## 3   11.P6D9_S11    9 D07-G     P6         11
## 4  12.KD3NT_S12   -2 D07-G     P8         12
## 5   13.P8D0_S13    0 D07-G     P8         13
## 6   14.P8D1_S14    1 D07-G     P8         14
## 7   15.P8D3_S15    3 D07-G     P8         15
## 8   16.P8D9_S16    9 D07-G     P8         16
## 9    2.KD5NT_S2   -2 D07-G     P5          2
## 10    3.P5D0_S3    0 D07-G     P5          3
## 11    4.P5D1_S4    1 D07-G     P5          4
## 12    5.P5D3_S5    3 D07-G     P5          5
## 13    6.P5D9_S6    9 D07-G     P5          6
## 14   7.KD6NT_S7   -2 D07-G     P6          7
## 15    8.P6D0_S8    0 D07-G     P6          8
## 16    9.P6D1_S9    1 D07-G     P6          9

Load day15 samples

FYI this file includes RNAseq from a different project (APEXseq); we’ll filter that out later

d15_file = file.path(ous_dir, "Feb22_RNAseq/02featureCounts/late_adipo_feb22.counts")
day15_tab = read.delim(d15_file, skip=1,header=T, stringsAsFactors = F)
colnames(day15_tab) = gsub("output.01hisat.","", gsub(".sorted.bam","",colnames(day15_tab)))
colnames(day15_tab)
##  [1] "Geneid"       "Chr"          "Start"        "End"          "Strand"      
##  [6] "Length"       "1.21405_S22"  "2.21406_S30"  "3.21407_S38"  "4.21408_S45" 
## [11] "5.21409_S52"  "6.21410_S1"   "7.21411_S8"   "8.21412_S15"  "9.21413_S23" 
## [16] "10.21414_S31" "11.21415_S39" "12.21416_S46" "13.21417_S53" "14.21418_S2" 
## [21] "15.21419_S9"  "16.21420_S16" "17.21421_S24" "18.21422_S32" "19.21423_S40"
## [26] "20.21424_S47" "21.21425_S54" "22.21426_S3"  "23.21427_S10" "24.21428_S17"
day15_info = read.delim(file.path(ous_dir, "Feb22_RNAseq/late_adipo_sample_info.txt"),
                header=T)
day15_info$sample_id = gsub("-",".", day15_info$sample_id)
day15_info$time = factor(gsub("D","", day15_info$time))
day15_info$donor = gsub("D07G","D07-G", gsub("D13A","D13-A", day15_info$donor))
colnames(day15_info) = gsub("rep", "biorep", colnames(day15_info))
#use cell_type column as experiment delimiter
colnames(day15_info) = gsub("cell_type", "exp", colnames(day15_info))

day15_info = day15_info[!(colnames(day15_info) %in% c("bio.condition","researcher"))]

colnames(day15_info); day15_info$sample_id
## [1] "time"       "biorep"     "exp"        "construct"  "donor"     
## [6] "separation" "sample_id"
##  [1] "1.21405_S22"  "2.21406_S30"  "3.21407_S38"  "4.21408_S45"  "5.21409_S52" 
##  [6] "6.21410_S1"   "7.21411_S8"   "8.21412_S15"  "9.21413_S23"  "10.21414_S31"
## [11] "11.21415_S39" "12.21416_S46" "13.21417_S53" "14.21418_S2"  "15.21419_S9" 
## [16] "16.21420_S16" "17.21421_S24" "18.21422_S32" "19.21423_S40" "20.21424_S47"
## [21] "21.21425_S54" "22.21426_S3"  "23.21427_S10" "24.21428_S17"

Edits to day15 file to fit: - remove D from time; add dash to donor - change column names: rep to biorep; cell_type to exp - remove researcher, bio.condition

Merge read counts and sample info

Merging the fc tables directly

early = merge(d13_tab, d7_tab)
colnames(early); dim(early)
##  [1] "Geneid"           "Chr"              "Start"            "End"             
##  [5] "Strand"           "Length"           "6.19190_S20"      "7.19191_S1"      
##  [9] "8.19192_S3"       "9.19193_S6"       "10.19194_S9"      "11.19195_S12"    
## [13] "12.19196_S15"     "13.19197_S18"     "14.19198_S21"     "15.19199_S2"     
## [17] "1.19200_S5"       "2.19201_S8"       "3.19202_S11"      "4.19203_S14"     
## [21] "5.19204_S17"      "16.19205_S4"      "17.19206_S7"      "18.19207_S10"    
## [25] "19.19208_S13"     "20.S2.KD5NT_S16"  "21.S7.KD6NT_S19"  "22.S12.KD3NT_S22"
## [29] "1.KD4NT_S1"       "10.P6D3_S10"      "11.P6D9_S11"      "12.KD3NT_S12"    
## [33] "13.P8D0_S13"      "14.P8D1_S14"      "15.P8D3_S15"      "16.P8D9_S16"     
## [37] "2.KD5NT_S2"       "3.P5D0_S3"        "4.P5D1_S4"        "5.P5D3_S5"       
## [41] "6.P5D9_S6"        "7.KD6NT_S7"       "8.P6D0_S8"        "9.P6D1_S9"
## [1] 58735    44
#Add experiment separator
d13_info$exp = 2
d7_info$exp = 1
colnames(d13_info)
## [1] "sample_id"  "time"       "donor"      "biorep"     "Tube.Label"
## [6] "exp"
colnames(d7_info)
## [1] "sample_id"  "time"       "donor"      "biorep"     "Tube.Label"
## [6] "exp"
early_info = rbind(d13_info, d7_info)
early_info$construct = "native"
early_info$separation = "bulk"

early_info = early_info[colnames(early_info) != "Tube.Label"]

table(paste(early_info$time, early_info$donor, early_info$exp))
## 
## -2 D07-G 1 -2 D07-G 2 -2 D13-A 2  0 D07-G 1  0 D13-A 2  1 D07-G 1  1 D13-A 2 
##          4          3          3          3          3          3          3 
## 15 D07-G 2 15 D13-A 2  3 D07-G 1  3 D13-A 2  9 D07-G 1  9 D13-A 2 
##          2          2          3          3          3          3

Merge sample information for D7 and d13; remove tube.label column and add columns construct=native and separation=bulk. Also add exp column; since d7 and d13 have overlapping samples

Merging in day15 now

whole = merge(early, day15_tab)
dim(whole)
## [1] 58735    68
whole_info = rbind(early_info, day15_info)
str(whole_info); tail(whole_info)
## 'data.frame':    62 obs. of  7 variables:
##  $ sample_id : chr  "6.19190_S20" "7.19191_S1" "8.19192_S3" "9.19193_S6" ...
##  $ time      : chr  "-2" "0" "1" "3" ...
##  $ donor     : chr  "D13-A" "D13-A" "D13-A" "D13-A" ...
##  $ biorep    : chr  "7" "7" "7" "7" ...
##  $ exp       : chr  "2" "2" "2" "2" ...
##  $ construct : chr  "native" "native" "native" "native" ...
##  $ separation: chr  "bulk" "bulk" "bulk" "bulk" ...
##       sample_id time donor biorep    exp construct separation
## 57 19.21423_S40   15 D07-G     P7 native    native       bulk
## 58 20.21424_S47   15 D07-G   P8_2 native    native       bulk
## 59 21.21425_S54   15 D07-G   P8_1 native    native       bulk
## 60  22.21426_S3   15 D13-A     P7 native    native       bulk
## 61 23.21427_S10   15 D13-A     P6 native    native       bulk
## 62 24.21428_S17   15 D13-A     P7 native    native       bulk

Save combined read counts so we don’t need to do this again.

write.table(whole, file.path(ous_dir, "Feb22_RNAseq/03limma/late_adipo_and_D7&D13_native_rnaseq.counts"), sep="\t", row.names = F)
write.table(whole_info, file.path(ous_dir, "Feb22_RNAseq/03limma/late_adipo_and_D7&D13_native_rnaseq_info.tab"), sep="\t", row.names = F)

Make DGE object

remove additional samples.

whole_info$time.donor = paste(whole_info$time, whole_info$donor, sep=".")
whole_info$group = paste(whole_info$time.donor, whole_info$separation, sep=".")

whole_ob = DGEList(counts=data.matrix(whole[grep("[[:digit:]]", colnames(whole))]),  
                   genes= whole[c("Geneid", "Length")], samples = whole_info)
rownames(whole_ob$counts) = whole_ob$genes$Geneid
summary (whole_ob)
##         Length  Class      Mode   
## counts  3641570 -none-     numeric
## samples      11 data.frame list   
## genes         2 data.frame list
plotMDS(whole_ob, labels = whole_ob$samples$group, gene.selection = "common")

plotMDS(whole_ob, labels = whole_ob$samples$exp, gene.selection = "common")

#Remove uncultured mature adipocytes from tissue extract
series_ob = whole_ob[,!(whole_ob$samples$exp == 2 & whole_ob$samples$time == 15)]
table(paste(series_ob$samples$group, whole_ob$samples$exp))
## 
##          -2.D07-G.bulk 1          -2.D07-G.bulk 2          -2.D13-A.bulk 2 
##                        2                        5                        3 
##     -2.D13-A.bulk native           0.D07-G.bulk 1           0.D13-A.bulk 2 
##                        1                        3                        3 
##      0.D13-A.bulk native           1.D07-G.bulk 1           1.D13-A.bulk 2 
##                        1                        3                        3 
##      1.D13-A.bulk native          15.D07-G.bulk 1       15.D07-G.bulk APEX 
##                        1                        4                        2 
##     15.D07-G.bulk native   15.D07-G.floating APEX     15.D13-A.bulk native 
##                        3                        3                        3 
##   15.D13-A.floating APEX 15.D13-A.floating native       21.D07-G.bulk APEX 
##                        1                        2                        6 
##           3.D07-G.bulk 1           3.D07-G.bulk 2           3.D13-A.bulk 2 
##                        2                        1                        3 
##      3.D13-A.bulk native           9.D07-G.bulk 1           9.D07-G.bulk 2 
##                        1                        2                        1 
##           9.D13-A.bulk 2 
##                        3
#remove APEX samples
series_ob = series_ob[,series_ob$samples$construct == "native"]

#Check column names are the same
series_ob$samples
##                              group lib.size norm.factors        sample_id time
## 6.19190_S20          -2.D13-A.bulk 43343287            1      6.19190_S20   -2
## 7.19191_S1            0.D13-A.bulk 53954862            1       7.19191_S1    0
## 8.19192_S3            1.D13-A.bulk 44816024            1       8.19192_S3    1
## 9.19193_S6            3.D13-A.bulk 27650074            1       9.19193_S6    3
## 10.19194_S9           9.D13-A.bulk 33681648            1      10.19194_S9    9
## 11.19195_S12         -2.D13-A.bulk 44069380            1     11.19195_S12   -2
## 12.19196_S15          0.D13-A.bulk 33748947            1     12.19196_S15    0
## 13.19197_S18          1.D13-A.bulk 35268416            1     13.19197_S18    1
## 14.19198_S21          3.D13-A.bulk 38990845            1     14.19198_S21    3
## 15.19199_S2           9.D13-A.bulk 31648240            1      15.19199_S2    9
## 1.19200_S5           -2.D13-A.bulk 24715231            1       1.19200_S5   -2
## 2.19201_S8            0.D13-A.bulk 25887790            1       2.19201_S8    0
## 3.19202_S11           1.D13-A.bulk 67946021            1      3.19202_S11    1
## 4.19203_S14           3.D13-A.bulk 51335376            1      4.19203_S14    3
## 5.19204_S17           9.D13-A.bulk 41570892            1      5.19204_S17    9
## 20.S2.KD5NT_S16      -2.D07-G.bulk 28227567            1  20.S2.KD5NT_S16   -2
## 21.S7.KD6NT_S19      -2.D07-G.bulk 30639099            1  21.S7.KD6NT_S19   -2
## 22.S12.KD3NT_S22     -2.D07-G.bulk 39314312            1 22.S12.KD3NT_S22   -2
## 1.KD4NT_S1           -2.D07-G.bulk 40875335            1       1.KD4NT_S1   -2
## 10.P6D3_S10           3.D07-G.bulk 37487858            1      10.P6D3_S10    3
## 11.P6D9_S11           9.D07-G.bulk 45348216            1      11.P6D9_S11    9
## 12.KD3NT_S12         -2.D07-G.bulk 52250725            1     12.KD3NT_S12   -2
## 13.P8D0_S13           0.D07-G.bulk 50473193            1      13.P8D0_S13    0
## 14.P8D1_S14           1.D07-G.bulk 37355443            1      14.P8D1_S14    1
## 15.P8D3_S15           3.D07-G.bulk 36821008            1      15.P8D3_S15    3
## 16.P8D9_S16           9.D07-G.bulk 25192201            1      16.P8D9_S16    9
## 2.KD5NT_S2           -2.D07-G.bulk 39256967            1       2.KD5NT_S2   -2
## 3.P5D0_S3             0.D07-G.bulk 35885732            1        3.P5D0_S3    0
## 4.P5D1_S4             1.D07-G.bulk 44499333            1        4.P5D1_S4    1
## 5.P5D3_S5             3.D07-G.bulk 45252924            1        5.P5D3_S5    3
## 6.P5D9_S6             9.D07-G.bulk 38835735            1        6.P5D9_S6    9
## 7.KD6NT_S7           -2.D07-G.bulk 37829067            1       7.KD6NT_S7   -2
## 8.P6D0_S8             0.D07-G.bulk 38988386            1        8.P6D0_S8    0
## 9.P6D1_S9             1.D07-G.bulk 40833777            1        9.P6D1_S9    1
## 13.21417_S53     15.D07-G.floating 39520835            1     13.21417_S53   15
## 14.21418_S2      15.D07-G.floating 43198916            1      14.21418_S2   15
## 15.21419_S9      15.D07-G.floating 53192791            1      15.21419_S9   15
## 16.21420_S16     15.D13-A.floating 58757001            1     16.21420_S16   15
## 17.21421_S24     15.D13-A.floating 64220795            1     17.21421_S24   15
## 18.21422_S32     15.D13-A.floating 65300951            1     18.21422_S32   15
## 19.21423_S40         15.D07-G.bulk 43513597            1     19.21423_S40   15
## 20.21424_S47         15.D07-G.bulk 68529570            1     20.21424_S47   15
## 21.21425_S54         15.D07-G.bulk 67338531            1     21.21425_S54   15
## 22.21426_S3          15.D13-A.bulk 28524813            1      22.21426_S3   15
## 23.21427_S10         15.D13-A.bulk 51405957            1     23.21427_S10   15
## 24.21428_S17         15.D13-A.bulk 52000236            1     24.21428_S17   15
##                  donor biorep    exp construct separation time.donor
## 6.19190_S20      D13-A      7      2    native       bulk   -2.D13-A
## 7.19191_S1       D13-A      7      2    native       bulk    0.D13-A
## 8.19192_S3       D13-A      7      2    native       bulk    1.D13-A
## 9.19193_S6       D13-A      7      2    native       bulk    3.D13-A
## 10.19194_S9      D13-A      7      2    native       bulk    9.D13-A
## 11.19195_S12     D13-A      8      2    native       bulk   -2.D13-A
## 12.19196_S15     D13-A      8      2    native       bulk    0.D13-A
## 13.19197_S18     D13-A      8      2    native       bulk    1.D13-A
## 14.19198_S21     D13-A      8      2    native       bulk    3.D13-A
## 15.19199_S2      D13-A      8      2    native       bulk    9.D13-A
## 1.19200_S5       D13-A      5      2    native       bulk   -2.D13-A
## 2.19201_S8       D13-A      5      2    native       bulk    0.D13-A
## 3.19202_S11      D13-A      5      2    native       bulk    1.D13-A
## 4.19203_S14      D13-A      5      2    native       bulk    3.D13-A
## 5.19204_S17      D13-A      5      2    native       bulk    9.D13-A
## 20.S2.KD5NT_S16  D07-G      5      2    native       bulk   -2.D07-G
## 21.S7.KD6NT_S19  D07-G      6      2    native       bulk   -2.D07-G
## 22.S12.KD3NT_S22 D07-G      8      2    native       bulk   -2.D07-G
## 1.KD4NT_S1       D07-G     P4      1    native       bulk   -2.D07-G
## 10.P6D3_S10      D07-G     P6      1    native       bulk    3.D07-G
## 11.P6D9_S11      D07-G     P6      1    native       bulk    9.D07-G
## 12.KD3NT_S12     D07-G     P8      1    native       bulk   -2.D07-G
## 13.P8D0_S13      D07-G     P8      1    native       bulk    0.D07-G
## 14.P8D1_S14      D07-G     P8      1    native       bulk    1.D07-G
## 15.P8D3_S15      D07-G     P8      1    native       bulk    3.D07-G
## 16.P8D9_S16      D07-G     P8      1    native       bulk    9.D07-G
## 2.KD5NT_S2       D07-G     P5      1    native       bulk   -2.D07-G
## 3.P5D0_S3        D07-G     P5      1    native       bulk    0.D07-G
## 4.P5D1_S4        D07-G     P5      1    native       bulk    1.D07-G
## 5.P5D3_S5        D07-G     P5      1    native       bulk    3.D07-G
## 6.P5D9_S6        D07-G     P5      1    native       bulk    9.D07-G
## 7.KD6NT_S7       D07-G     P6      1    native       bulk   -2.D07-G
## 8.P6D0_S8        D07-G     P6      1    native       bulk    0.D07-G
## 9.P6D1_S9        D07-G     P6      1    native       bulk    1.D07-G
## 13.21417_S53     D07-G     P6 native    native   floating   15.D07-G
## 14.21418_S2      D07-G   P8_1 native    native   floating   15.D07-G
## 15.21419_S9      D07-G   P8_2 native    native   floating   15.D07-G
## 16.21420_S16     D13-A   P6_1 native    native   floating   15.D13-A
## 17.21421_S24     D13-A   P6_2 native    native   floating   15.D13-A
## 18.21422_S32     D13-A     P7 native    native   floating   15.D13-A
## 19.21423_S40     D07-G     P7 native    native       bulk   15.D07-G
## 20.21424_S47     D07-G   P8_2 native    native       bulk   15.D07-G
## 21.21425_S54     D07-G   P8_1 native    native       bulk   15.D07-G
## 22.21426_S3      D13-A     P7 native    native       bulk   15.D13-A
## 23.21427_S10     D13-A     P6 native    native       bulk   15.D13-A
## 24.21428_S17     D13-A     P7 native    native       bulk   15.D13-A
colnames(series_ob)
##  [1] "6.19190_S20"      "7.19191_S1"       "8.19192_S3"       "9.19193_S6"      
##  [5] "10.19194_S9"      "11.19195_S12"     "12.19196_S15"     "13.19197_S18"    
##  [9] "14.19198_S21"     "15.19199_S2"      "1.19200_S5"       "2.19201_S8"      
## [13] "3.19202_S11"      "4.19203_S14"      "5.19204_S17"      "20.S2.KD5NT_S16" 
## [17] "21.S7.KD6NT_S19"  "22.S12.KD3NT_S22" "1.KD4NT_S1"       "10.P6D3_S10"     
## [21] "11.P6D9_S11"      "12.KD3NT_S12"     "13.P8D0_S13"      "14.P8D1_S14"     
## [25] "15.P8D3_S15"      "16.P8D9_S16"      "2.KD5NT_S2"       "3.P5D0_S3"       
## [29] "4.P5D1_S4"        "5.P5D3_S5"        "6.P5D9_S6"        "7.KD6NT_S7"      
## [33] "8.P6D0_S8"        "9.P6D1_S9"        "13.21417_S53"     "14.21418_S2"     
## [37] "15.21419_S9"      "16.21420_S16"     "17.21421_S24"     "18.21422_S32"    
## [41] "19.21423_S40"     "20.21424_S47"     "21.21425_S54"     "22.21426_S3"     
## [45] "23.21427_S10"     "24.21428_S17"

Initial Plots

Everything looks good. 1. MDSplot separates timepoints well 2. the RLE plot shows that median counts between libraries are 0, meaning library size and distribution is similar between libraries. Therefore normalisation between experiments is not necessary.

plotMDS(series_ob, labels = series_ob$samples$time, gene.selection = "common")

plotRLE(series_ob$counts, outline=FALSE, col=series_ob$samples$group, main="Before filtering")

## Filtering

plotSmear(series_ob, main = "Before Filtering")

dim(series_ob)
## [1] 58735    46
filt_series = series_ob[filterByExpr(series_ob),, keep.lib.sizes=FALSE]
dim(filt_series) #21174 genes kept (about 1k more genes that the timecourse to day9)
## [1] 21174    46
#The filter is approxiamtely 10/ libsize in millions
10/(median(series_ob$samples$lib.size)/1000000) #0.245 minimum CPM 
## [1] 0.2447707
#sanity check that gene_names match up
filt_series$counts[filt_series$genes$Geneid == "ENSG00000228630",] #HOTAIR
##      6.19190_S20       7.19191_S1       8.19192_S3       9.19193_S6 
##            67.00            60.00            61.00            26.00 
##      10.19194_S9     11.19195_S12     12.19196_S15     13.19197_S18 
##            18.00            39.00            52.00            36.00 
##     14.19198_S21      15.19199_S2       1.19200_S5       2.19201_S8 
##            41.00            15.00            18.00            46.00 
##      3.19202_S11      4.19203_S14      5.19204_S17  20.S2.KD5NT_S16 
##           105.00            37.00            12.00           200.00 
##  21.S7.KD6NT_S19 22.S12.KD3NT_S22       1.KD4NT_S1      10.P6D3_S10 
##           251.90           313.00           276.00           325.00 
##      11.P6D9_S11     12.KD3NT_S12      13.P8D0_S13      14.P8D1_S14 
##            96.00           427.00           770.25           462.00 
##      15.P8D3_S15      16.P8D9_S16       2.KD5NT_S2        3.P5D0_S3 
##           237.00           106.00           196.00           375.00 
##        4.P5D1_S4        5.P5D3_S5        6.P5D9_S6       7.KD6NT_S7 
##           454.00           438.00           151.00           342.00 
##        8.P6D0_S8        9.P6D1_S9     13.21417_S53      14.21418_S2 
##           595.00           330.00            71.00            41.00 
##      15.21419_S9     16.21420_S16     17.21421_S24     18.21422_S32 
##            71.00            26.00            11.00            15.00 
##     19.21423_S40     20.21424_S47     21.21425_S54      22.21426_S3 
##           339.00           797.12           844.00            26.00 
##     23.21427_S10     24.21428_S17 
##            78.00            47.00
filt_series$counts[filt_series$genes$Geneid == "ENSG00000132170",] #PPARG
##      6.19190_S20       7.19191_S1       8.19192_S3       9.19193_S6 
##          1303.10          1667.00          3994.00          6675.10 
##      10.19194_S9     11.19195_S12     12.19196_S15     13.19197_S18 
##          7365.24          1319.10           557.12          3194.31 
##     14.19198_S21      15.19199_S2       1.19200_S5       2.19201_S8 
##          7950.67          9278.10           542.00           671.00 
##      3.19202_S11      4.19203_S14      5.19204_S17  20.S2.KD5NT_S16 
##          4561.33          8161.83         11953.80           392.20 
##  21.S7.KD6NT_S19 22.S12.KD3NT_S22       1.KD4NT_S1      10.P6D3_S10 
##          1183.37          1248.50           743.00          3307.83 
##      11.P6D9_S11     12.KD3NT_S12      13.P8D0_S13      14.P8D1_S14 
##          5122.53           976.17           645.00          1255.00 
##      15.P8D3_S15      16.P8D9_S16       2.KD5NT_S2        3.P5D0_S3 
##          3011.50          3613.41           537.00           262.00 
##        4.P5D1_S4        5.P5D3_S5        6.P5D9_S6       7.KD6NT_S7 
##           886.00          3387.60          6195.75           793.00 
##        8.P6D0_S8        9.P6D1_S9     13.21417_S53      14.21418_S2 
##           537.10          1261.10         20687.05         21560.08 
##      15.21419_S9     16.21420_S16     17.21421_S24     18.21422_S32 
##         19293.64         30703.87         32751.01         38751.89 
##     19.21423_S40     20.21424_S47     21.21425_S54      22.21426_S3 
##         10886.47         12895.56         13914.89          8218.48 
##     23.21427_S10     24.21428_S17 
##         16600.64         18475.29
plotSmear(filt_series, main = "After Filtering") 

Donor vs donor this looks like very nice filtering (above, at any one time point some genes will be missing).

#Plot donor v donor -> a few low exression genes still but predominantly good filtering
two_filt = filt_series
two_filt$samples$group = as.factor(filt_series$samples$donor)
plotSmear(two_filt, main = "After Filtering")

#library sizes are different, but relative log expression is
#now ~ normally distributed within libraries
plotRLE(filt_series$counts, outline=FALSE, 
        col=filt_series$samples$group, main="After filtering")

plotMDS(filt_series, labels = filt_series$samples$group, gene.selection = "common")

Normalise

filt_series = calcNormFactors(filt_series,method = "TMM")
filt_series$samples$norm.factors
##  [1] 1.2453947 1.2143844 1.1251937 1.1279176 0.9458378 1.3221153 1.0006515
##  [8] 1.1403659 0.9928395 1.0445354 1.3664816 1.1431881 1.2621641 1.0546869
## [15] 1.0419248 1.2097142 1.3116360 1.2920292 1.1244045 0.9951992 0.7803828
## [22] 1.1123288 0.9314547 1.0344064 0.8685081 0.8944916 1.1077081 0.8561931
## [29] 0.9462953 0.9419874 0.8237669 1.1048697 0.9583928 0.8541696 0.7900846
## [36] 0.6108555 0.6834306 0.8374520 0.7701180 0.8412588 0.9839876 1.0048101
## [43] 1.0123225 0.9974551 0.9286849 1.0430259
plotRLE(cpm(filt_series$counts), outline=FALSE, 
        col=filt_series$samples$group, main="After TMM normalisation (no libsize)")

Library size is approximately the same after CPM normalisation, but it still needs to be adjusted directly for library size:

plotRLE(cpm(filt_series, normalized.lib.sizes = TRUE), outline=FALSE, 
        col=filt_series$samples$group, main="After TMM normalisation (with libsize)")

Annotate Gene Lists

library(biomaRt)
mart <- biomaRt::useMart(biomart = "ensembl",
  dataset = "hsapiens_gene_ensembl",
   host = "https://jan2019.archive.ensembl.org")
annot = getBM(c("external_gene_name", "description", "ensembl_gene_id"), 
              filters = "ensembl_gene_id",
              values = filt_series$genes$Geneid,
              mart = mart)

head(annot, n=2); dim(annot)
##   external_gene_name                                       description
## 1             TSPAN6 tetraspanin 6 [Source:HGNC Symbol;Acc:HGNC:11858]
## 2               TNMD   tenomodulin [Source:HGNC Symbol;Acc:HGNC:17757]
##   ensembl_gene_id
## 1 ENSG00000000003
## 2 ENSG00000000005
## [1] 21174     3
#Tidying up the annot table
annot$description = gsub("\\[Source:.+\\]", "", annot$description)
colnames(annot)[1] = "gene_name"

#Add gene names to filt_series
filt_series$genes = merge(filt_series$genes, annot, by.x= "Geneid", 
                        by.y = "ensembl_gene_id", sort=FALSE)
#head(filt_series$genes)
#make sure length is numeric
filt_series$genes$Length = as.numeric(filt_series$genes$Length)

Limma Log Normalisation

norm_series = voom(filt_series, plot = TRUE)

head(norm_series$design, n=2) #groups are time.donor
##             (Intercept) group-2.D13-A.bulk group0.D07-G.bulk group0.D13-A.bulk
## 6.19190_S20           1                  1                 0                 0
## 7.19191_S1            1                  0                 0                 1
##             group1.D07-G.bulk group1.D13-A.bulk group15.D07-G.bulk
## 6.19190_S20                 0                 0                  0
## 7.19191_S1                  0                 0                  0
##             group15.D07-G.floating group15.D13-A.bulk group15.D13-A.floating
## 6.19190_S20                      0                  0                      0
## 7.19191_S1                       0                  0                      0
##             group3.D07-G.bulk group3.D13-A.bulk group9.D07-G.bulk
## 6.19190_S20                 0                 0                 0
## 7.19191_S1                  0                 0                 0
##             group9.D13-A.bulk
## 6.19190_S20                 0
## 7.19191_S1                  0
summary(norm_series$E[,1])#log normlisation min -6, max 14
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -6.754  -1.468   1.972   1.594   4.635  14.871

Design

How to separate floating and bulk adipocytes? Perhaps I’ll need to run the analysis separately, but start together with the separation as a factor.

#1 check the default design
colnames(norm_series$design)
##  [1] "(Intercept)"            "group-2.D13-A.bulk"     "group0.D07-G.bulk"     
##  [4] "group0.D13-A.bulk"      "group1.D07-G.bulk"      "group1.D13-A.bulk"     
##  [7] "group15.D07-G.bulk"     "group15.D07-G.floating" "group15.D13-A.bulk"    
## [10] "group15.D13-A.floating" "group3.D07-G.bulk"      "group3.D13-A.bulk"     
## [13] "group9.D07-G.bulk"      "group9.D13-A.bulk"
head(filt_series$samples, n=2)
##                     group lib.size norm.factors   sample_id time donor biorep
## 6.19190_S20 -2.D13-A.bulk 43327502     1.245395 6.19190_S20   -2 D13-A      7
## 7.19191_S1   0.D13-A.bulk 53932903     1.214384  7.19191_S1    0 D13-A      7
##             exp construct separation time.donor
## 6.19190_S20   2    native       bulk   -2.D13-A
## 7.19191_S1    2    native       bulk    0.D13-A
pair_design = model.matrix(~0+filt_series$samples$group)
colnames(pair_design) = paste("day", gsub("-",".",levels(filt_series$samples$group)), sep="")
rownames(pair_design) = filt_series$samples$sample_id
head(pair_design, n=2)
##             day.2.D07.G.bulk day.2.D13.A.bulk day0.D07.G.bulk day0.D13.A.bulk
## 6.19190_S20                0                1               0               0
## 7.19191_S1                 0                0               0               1
##             day1.D07.G.bulk day1.D13.A.bulk day15.D07.G.bulk
## 6.19190_S20               0               0                0
## 7.19191_S1                0               0                0
##             day15.D07.G.floating day15.D13.A.bulk day15.D13.A.floating
## 6.19190_S20                    0                0                    0
## 7.19191_S1                     0                0                    0
##             day3.D07.G.bulk day3.D13.A.bulk day9.D07.G.bulk day9.D13.A.bulk
## 6.19190_S20               0               0               0               0
## 7.19191_S1                0               0               0               0

Making a large time based model

Using bulk day15 samples time_effect_donor7 + time_effect_donor13 was the pvalue used at 0.01 to select genes for dpgp

pair_fit = lmFit(norm_series, design=pair_design)

contrasts = makeContrasts(time_effect_donor7 = (day0.D07.G.bulk - day.2.D07.G.bulk ) + #D-2 -> D0 effect
                                                  (day1.D07.G.bulk - day0.D07.G.bulk) + #D0 -> D1 effect
                                                  (day3.D07.G.bulk - day1.D07.G.bulk) + #D1 -> D3 effect
                                                  (day9.D07.G.bulk - day3.D07.G.bulk) + #D3 -> D9 effect 
                                                  (day15.D07.G.bulk - day9.D07.G.bulk),
                          time_effect_donor13 = (day0.D13.A.bulk - day.2.D13.A.bulk ) + #D-2 -> D0 effect
                                                  (day1.D13.A.bulk - day0.D13.A.bulk) + #D0 -> D1 effect
                                                  (day3.D13.A.bulk - day1.D13.A.bulk) + #D1 -> D3 effect
                                                  (day9.D13.A.bulk - day3.D13.A.bulk) + #D3 -> D9 effect 
                                                  (day15.D13.A.bulk - day9.D13.A.bulk), #D9 -> D15 effect
                          time_effect_per_donor = ((day0.D07.G.bulk - day.2.D07.G.bulk ) + #donor 7
                                                  (day1.D07.G.bulk - day0.D07.G.bulk) + 
                                                  (day3.D07.G.bulk - day1.D07.G.bulk) + 
                                                  (day9.D07.G.bulk - day3.D07.G.bulk) + 
                                                  (day15.D07.G.bulk - day9.D07.G.bulk)) - #minus
                                                  ((day0.D13.A.bulk - day.2.D13.A.bulk ) + #donor 13
                                                  (day1.D13.A.bulk - day0.D13.A.bulk) + 
                                                  (day3.D13.A.bulk - day1.D13.A.bulk) + 
                                                  (day9.D13.A.bulk - day3.D13.A.bulk) + 
                                                  (day15.D13.A.bulk - day9.D13.A.bulk)),
                          levels=pair_design)
special_fit = contrasts.fit(pair_fit,contrasts)
special_fit = eBayes(special_fit, robust = TRUE)
summary(decideTests(special_fit, method="separate", adjust.method = "BH", p.value = 0.01)) #Adjusted for contrast
##        time_effect_donor7 time_effect_donor13 time_effect_per_donor
## Down                 4563                4558                    67
## NotSig              11929               12246                 20976
## Up                   4682                4370                   131
summary(decideTests(special_fit ,method="global", adjust.method = "BH")) #Unadjusted for contrast
##        time_effect_donor7 time_effect_donor13 time_effect_per_donor
## Down                 5336                5534                  1399
## NotSig              10166               10173                 18229
## Up                   5672                5467                  1546

Interesting, the time effect is less significant in donor13. The time effect is similar rather than different between donors -> most genes respons similarly to adipogenic cocktail, regardless of depot/donor.

Number of DE genes

Adding an extra timepoint adds more genes to the DE list; 11,639 genes at p < 0.01 (basically 50% of all detected genes).

comb = topTable(special_fit, number = nrow(pair_fit$genes), coef = c("time_effect_donor7","time_effect_donor13"))
dim(comb[comb$adj.P.Val < 0.05,]) #More than half of all genes change; most will be the same
## [1] 14236    10
dim(comb[comb$adj.P.Val < 0.01,]) 
## [1] 11639    10
volcanoplot(special_fit, coef = c(1,2), highlight =10, names = special_fit$genes$gene_name,
            main="Time effect (combined across donors)", xlab="log2FC ABDO <---> GLUT")

volcanoplot(special_fit, coef = c(1,2), highlight =11639, names = special_fit$genes$gene_name,
            main="Time effect (combined across donors)", xlab="log2FC ABDO <---> GLUT")

volcanoplot(special_fit, coef = c(1,2),
            main="Time effect (combined across donors)", xlab="log2FC ABDO <---> GLUT")

head(comb)
##                          Geneid Length gene_name
## ENSG00000151726 ENSG00000151726   6284     ACSL1
## ENSG00000099194 ENSG00000099194   5362       SCD
## ENSG00000091513 ENSG00000091513  26199        TF
## ENSG00000056998 ENSG00000056998   3655      GYG2
## ENSG00000042445 ENSG00000042445   4005    RETSAT
## ENSG00000131471 ENSG00000131471   4524      AOC3
##                                                     description
## ENSG00000151726 acyl-CoA synthetase long chain family member 1 
## ENSG00000099194                        stearoyl-CoA desaturase 
## ENSG00000091513                                    transferrin 
## ENSG00000056998                                   glycogenin 2 
## ENSG00000042445                               retinol saturase 
## ENSG00000131471             amine oxidase, copper containing 3 
##                 time_effect_donor7 time_effect_donor13   AveExpr         F
## ENSG00000151726           6.328527            6.587910  8.810891 1311.9493
## ENSG00000099194           8.374125            8.116002 11.807874  977.5091
## ENSG00000091513          15.353000           14.232266  3.105257  772.2546
## ENSG00000056998           5.435924            4.592658  4.980263  682.1097
## ENSG00000042445           3.744336            4.451633  7.100134  664.3586
## ENSG00000131471          11.794312           11.680727  6.379407  583.7300
##                      P.Value    adj.P.Val
## ENSG00000151726 4.270636e-36 9.042645e-32
## ENSG00000099194 1.366785e-33 1.447015e-29
## ENSG00000091513 1.130334e-31 7.977894e-28
## ENSG00000056998 9.522686e-31 5.040834e-27
## ENSG00000042445 1.558533e-30 6.600076e-27
## ENSG00000131471 2.068864e-29 6.793301e-26
write.table(comb, sep="\t", row.names=FALSE,
            file.path(ous_dir, "Feb22_RNAseq/03limma/time_effect_perdonor.tab"))

Still, at p < 0.05 about 8k genes hit sig. in both donors separately, and 3k each only hit sig. in one donor. For the HOTAIR project we didn’t want to select consensus DE genes, because its only DE in Glut. For the nucleolus project it might have made sense to, but to match what’s gone before I don’t want to make it too complicated.

donor7 = topTable(special_fit, number = nrow(pair_fit$genes), coef = c("time_effect_donor7"), adjust.method = "BH")
donor13 = topTable(special_fit, number = nrow(pair_fit$genes), coef = c("time_effect_donor13"))

donor7 = donor7[donor7$adj.P.Val < 0.05,]
donor13 = donor13[donor13$adj.P.Val < 0.05,]

dim(donor7);dim(donor13)
## [1] 11472    10
## [1] 11512    10
both_donors = merge(donor7, donor13, all=FALSE, by="Geneid"); dim(both_donors)
## [1] 8152   19
just13 = donor13[!(donor13$Geneid %in% donor7$Geneid),]
dim(just13)
## [1] 3360   10
just7 = donor7[!(donor7$Geneid %in% donor13$Geneid),]
dim(just7)
## [1] 3320   10

Other plots to look at the difference

library(ggplot2)
ggplot(comb, aes(y=AveExpr, x=-log10(P.Value))) + geom_point(alpha=0.1) +
  geom_point(data=comb[1:5000,], colour="red", alpha=0.1)

log FC tables

logFC time pairs for each donor separately (so 8 total columns) Also adding a bulk to floating test, since floating cells are theoretically the most mature subset of bulk cells.

pair_cont = makeContrasts(#Donor 7 changes over time
                          d.2_to_d0.D7 = day0.D07.G.bulk - day.2.D07.G.bulk,
                          d0_to_d1.D7 = day1.D07.G.bulk - day0.D07.G.bulk,
                          d1_to_d3.D7 = day3.D07.G.bulk - day1.D07.G.bulk, 
                          d3_to_d9.D7= day9.D07.G.bulk - day3.D07.G.bulk,
                          d9_to_d15.D7.bulk =  day15.D07.G.bulk - day9.D07.G.bulk,
                          d9_to_d15.D7.floating =  day15.D07.G.floating - day9.D07.G.bulk,
                          d15.bulk_to_floating.D7 = day15.D07.G.floating - day15.D07.G.bulk,
                          #Donor 13 changes over time
                          d.2_to_d0.D13 = day0.D13.A.bulk - day.2.D13.A.bulk,
                          d0_to_d1.D13 = day1.D13.A.bulk - day0.D13.A.bulk,
                          d1_to_d3.D13 = day3.D13.A.bulk - day1.D13.A.bulk,
                          d3_to_d9.D13 = day9.D13.A.bulk - day3.D13.A.bulk,
                          d9_to_d15.D13.bulk =  day15.D13.A.bulk - day9.D13.A.bulk,
                          d9_to_d15.D13.floating =  day15.D13.A.floating - day9.D13.A.bulk,
                          d15.bulk_to_floating.D13 = day15.D13.A.floating - day15.D13.A.bulk,
                          levels=pair_design)

pair_fit = contrasts.fit(pair_fit,pair_cont)
pair_fit = eBayes(pair_fit, robust = TRUE)
pair_tab = topTable(pair_fit, number = nrow(pair_fit$genes))

nrow(pair_tab[pair_tab$adj.P.Val < 0.01,])
## [1] 18297

Donor 13 has very similar bulk and floating; D7 bulk and floating are very different :P

result = decideTests(pair_fit, p.value = 0.01)

summary(result)
##        d.2_to_d0.D7 d0_to_d1.D7 d1_to_d3.D7 d3_to_d9.D7 d9_to_d15.D7.bulk
## Down           2489        1437         250         319              3353
## NotSig        15507       18900       20481       20466             14120
## Up             3178         837         443         389              3701
##        d9_to_d15.D7.floating d15.bulk_to_floating.D7 d.2_to_d0.D13 d0_to_d1.D13
## Down                    4357                    3097           909         1301
## NotSig                 12264                   14860         19492        19021
## Up                      4553                    3217           773          852
##        d1_to_d3.D13 d3_to_d9.D13 d9_to_d15.D13.bulk d9_to_d15.D13.floating
## Down            619          178               3067                   3181
## NotSig        19845        20687              15334                  14969
## Up              710          309               2773                   3024
##        d15.bulk_to_floating.D13
## Down                        131
## NotSig                    20871
## Up                          172
info = data.frame(t(summary(result)))
colnames(info) = c("test", "result", "freq")
info$donor = gsub("d.*_to_(d.*|floating)\\.", "", gsub(".(bulk|floating)$", "", info$test))
info$timezone = factor(gsub("\\.D(7|13)", "", info$test), 
                       levels = c("d.2_to_d0","d0_to_d1", "d1_to_d3" ,"d3_to_d9" ,"d9_to_d15.bulk", "d9_to_d15.floating","d15.bulk_to_floating"))
info$result = factor(info$result, levels=c("Up","Down", "NotSig"))
info
##                        test result  freq donor             timezone
## 1              d.2_to_d0.D7   Down  2489    D7            d.2_to_d0
## 2               d0_to_d1.D7   Down  1437    D7             d0_to_d1
## 3               d1_to_d3.D7   Down   250    D7             d1_to_d3
## 4               d3_to_d9.D7   Down   319    D7             d3_to_d9
## 5         d9_to_d15.D7.bulk   Down  3353    D7       d9_to_d15.bulk
## 6     d9_to_d15.D7.floating   Down  4357    D7   d9_to_d15.floating
## 7   d15.bulk_to_floating.D7   Down  3097    D7 d15.bulk_to_floating
## 8             d.2_to_d0.D13   Down   909   D13            d.2_to_d0
## 9              d0_to_d1.D13   Down  1301   D13             d0_to_d1
## 10             d1_to_d3.D13   Down   619   D13             d1_to_d3
## 11             d3_to_d9.D13   Down   178   D13             d3_to_d9
## 12       d9_to_d15.D13.bulk   Down  3067   D13       d9_to_d15.bulk
## 13   d9_to_d15.D13.floating   Down  3181   D13   d9_to_d15.floating
## 14 d15.bulk_to_floating.D13   Down   131   D13 d15.bulk_to_floating
## 15             d.2_to_d0.D7 NotSig 15507    D7            d.2_to_d0
## 16              d0_to_d1.D7 NotSig 18900    D7             d0_to_d1
## 17              d1_to_d3.D7 NotSig 20481    D7             d1_to_d3
## 18              d3_to_d9.D7 NotSig 20466    D7             d3_to_d9
## 19        d9_to_d15.D7.bulk NotSig 14120    D7       d9_to_d15.bulk
## 20    d9_to_d15.D7.floating NotSig 12264    D7   d9_to_d15.floating
## 21  d15.bulk_to_floating.D7 NotSig 14860    D7 d15.bulk_to_floating
## 22            d.2_to_d0.D13 NotSig 19492   D13            d.2_to_d0
## 23             d0_to_d1.D13 NotSig 19021   D13             d0_to_d1
## 24             d1_to_d3.D13 NotSig 19845   D13             d1_to_d3
## 25             d3_to_d9.D13 NotSig 20687   D13             d3_to_d9
## 26       d9_to_d15.D13.bulk NotSig 15334   D13       d9_to_d15.bulk
## 27   d9_to_d15.D13.floating NotSig 14969   D13   d9_to_d15.floating
## 28 d15.bulk_to_floating.D13 NotSig 20871   D13 d15.bulk_to_floating
## 29             d.2_to_d0.D7     Up  3178    D7            d.2_to_d0
## 30              d0_to_d1.D7     Up   837    D7             d0_to_d1
## 31              d1_to_d3.D7     Up   443    D7             d1_to_d3
## 32              d3_to_d9.D7     Up   389    D7             d3_to_d9
## 33        d9_to_d15.D7.bulk     Up  3701    D7       d9_to_d15.bulk
## 34    d9_to_d15.D7.floating     Up  4553    D7   d9_to_d15.floating
## 35  d15.bulk_to_floating.D7     Up  3217    D7 d15.bulk_to_floating
## 36            d.2_to_d0.D13     Up   773   D13            d.2_to_d0
## 37             d0_to_d1.D13     Up   852   D13             d0_to_d1
## 38             d1_to_d3.D13     Up   710   D13             d1_to_d3
## 39             d3_to_d9.D13     Up   309   D13             d3_to_d9
## 40       d9_to_d15.D13.bulk     Up  2773   D13       d9_to_d15.bulk
## 41   d9_to_d15.D13.floating     Up  3024   D13   d9_to_d15.floating
## 42 d15.bulk_to_floating.D13     Up   172   D13 d15.bulk_to_floating
ggplot(info) + geom_bar(stat="identity",aes(x=timezone,y=freq, fill=donor, group=donor), position="dodge") + 
  theme(axis.text.x = element_text(angle=45, hjust=1)) + facet_grid(result~., scales = "free_y")

ggplot(info[info$result != "NotSig",]) + geom_bar(stat="identity",aes(x=timezone,y=freq, fill=donor, group=donor), position="dodge") + 
  theme(axis.text.x = element_text(angle=45, hjust=1)) + facet_grid(result~.)

ggplot(info[info$result != "NotSig",]) + geom_bar(stat="summary", fun="sum",aes(x=timezone,y=freq, fill=donor, group=donor), position="dodge") + 
  theme(axis.text.x = element_text(angle=45, hjust=1)) + ylab("number sig genes")

Donor7 has more dynamic gene expression than donor13, particularly in cells becoming confluent and between bulk and floating adipocyte fractions. This means that when I do combined tests and analysis the effect is often driven by donor7 rather than donor13. It’s also notable that there is a huge difference in gene expression from day9 to day15, which will dominate over the more moderate effects seen in the early adipogenesis (from day0 to day9). It may make sense to streamline our timepoints or comparisons, to test more specific things. For example Pro -> D0 -> D9 -> D15

vennDiagram(result[,c("d.2_to_d0.D7", "d9_to_d15.D7.bulk", "d9_to_d15.D7.floating")], include="up")

vennDiagram(result[,c("d.2_to_d0.D7", "d9_to_d15.D7.bulk", "d9_to_d15.D7.floating")], include="down")

vennDiagram(result[,c("d9_to_d15.D7.bulk", "d9_to_d15.D7.floating", "d9_to_d15.D13.bulk","d9_to_d15.D13.floating")])

vennDiagram(result[,c( "d.2_to_d0.D7","d9_to_d15.D7.bulk", "d9_to_d15.D13.bulk")]) #day-2 to day0 changing genes often also change d9 to d15

#write.table(pair_tab,  sep="\t", row.names=FALSE, 
 #           file.path(limma_dir, "logFCs_timePairs.tab"))

Get an overview of these changes in a table form….

No genes are significant at every condition (time * donor). Few genes are significant at no condition (just 4650). Many genes are significant in 1 to 4 conditions, and fewer in 5 and more conditions.

dynamic = as.data.frame(result)
dynamic$sig_n = rowSums(result != 0)
dynamic$up_n = rowSums(result == 1)
dynamic$down_n = rowSums(result == -1)

head(dynamic)
##                 d.2_to_d0.D7 d0_to_d1.D7 d1_to_d3.D7 d3_to_d9.D7
## ENSG00000000003            0           0           0           0
## ENSG00000000005            0           0           0           0
## ENSG00000000419            0           0           0           0
## ENSG00000000457            0           0           0           0
## ENSG00000000460           -1           0           0           0
## ENSG00000000938            1           0           0           0
##                 d9_to_d15.D7.bulk d9_to_d15.D7.floating d15.bulk_to_floating.D7
## ENSG00000000003                 1                     1                       0
## ENSG00000000005                 0                     0                       0
## ENSG00000000419                 0                     0                       0
## ENSG00000000457                 0                     0                       0
## ENSG00000000460                 0                     0                       0
## ENSG00000000938                 0                     1                       1
##                 d.2_to_d0.D13 d0_to_d1.D13 d1_to_d3.D13 d3_to_d9.D13
## ENSG00000000003             0            0            0            0
## ENSG00000000005             0            0            0            1
## ENSG00000000419             0            0            0            0
## ENSG00000000457             0            0            0            0
## ENSG00000000460            -1            0            0            0
## ENSG00000000938             0            0            0            0
##                 d9_to_d15.D13.bulk d9_to_d15.D13.floating
## ENSG00000000003                  0                      0
## ENSG00000000005                  0                      0
## ENSG00000000419                  0                      0
## ENSG00000000457                  0                      0
## ENSG00000000460                  0                      0
## ENSG00000000938                  1                      1
##                 d15.bulk_to_floating.D13 sig_n up_n down_n
## ENSG00000000003                        0     2    2      0
## ENSG00000000005                        0     1    1      0
## ENSG00000000419                        0     0    0      0
## ENSG00000000457                        0     0    0      0
## ENSG00000000460                        0     2    0      2
## ENSG00000000938                        0     5    5      0
table(dynamic$sig_n)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13 
## 4650 3939 3965 2624 3043 1430  746  393  218  104   39   18    4    1
table(dynamic$up_n)
## 
##     0     1     2     3     4     5     6     7     8     9    10    11    12 
## 10140  4466  2767  1685  1239   532   227    74    26    11     5     1     1
table(dynamic$down_n)
## 
##     0     1     2     3     4     5     6     7     9 
## 10294  4036  2912  1449  2044   346    78    14     1
plot(table(dynamic$sig_n))

ccount = function(x){return(-count(x))}

ggplot(dynamic) + geom_histogram(aes(x=up_n), fill="blue", binwidth = 1, colour="grey") +
  geom_histogram( aes(x=down_n, y=-(..count..)), fill="red", binwidth = 1, colour="grey") 

#write.table(result, file.path(ous_dir,"Feb22_RNAseq/03limma/decideResults_pairwise_summary.tab" ), sep="\t")

binary = result
binary[binary == -1] = 1
write.table(binary, file.path(ous_dir,"Feb22_RNAseq/03limma/decideResults_pairwise_binary.txt" ), sep="\t")
top = ggplot(dynamic) + geom_histogram(aes(x=up_n), fill="blue", binwidth = 1, colour="grey") 
b=  ggplot(dynamic) + geom_histogram( aes(x=down_n, y=-(..count..)), fill="red", binwidth = 1, colour="grey")  

cowplot::plot_grid(top,b, ncol=1,nrow=2)

Plot gene expression profiles; with respect to clusters

Step 1: make rpkms easier to do from the total than re-merge rpkms from previous analyses

rpkm = as.data.frame(rpkm(filt_series, normalized.lib.sizes = T))
rpkm$gene = rownames(rpkm)
head(rpkm)
##                 6.19190_S20   7.19191_S1  8.19192_S3 9.19193_S6 10.19194_S9
## ENSG00000000003    4.429771  8.156323470  4.38952419 10.6804534   6.3980668
## ENSG00000000005    0.000000  0.009483402  0.04928463  0.0597703   1.3456444
## ENSG00000000419   44.399215 35.471356378 41.58979026 43.5042255  52.6514494
## ENSG00000000457    1.238538  2.004196996  1.73642745  2.1702937   1.8297088
## ENSG00000000460    1.180203  0.616667448  0.79787079  0.6343311   0.5946063
## ENSG00000000938    0.037342  0.039555121  0.06281165  0.1200338   0.2892193
##                 11.19195_S12 12.19196_S15 13.19197_S18 14.19198_S21 15.19199_S2
## ENSG00000000003  6.801005854   5.34283930   8.97825018   6.20967066  10.8402147
## ENSG00000000005  0.000000000   0.00000000   0.01544879   0.03209898   2.4098978
## ENSG00000000419 45.991978584  39.33891270  49.48751378  50.78492239  42.9456144
## ENSG00000000457  1.295959535   1.57592908   1.85270325   1.76943419   2.0765218
## ENSG00000000460  2.466087279   0.52619404   0.65859945   0.48500820   0.6567233
## ENSG00000000938  0.004942582   0.02557923   0.03579814   0.20826453   0.2264708
##                  1.19200_S5  2.19201_S8 3.19202_S11 4.19203_S14 5.19204_S17
## ENSG00000000003  9.06770306  5.32941019  5.84949758   6.0114837   9.2254928
## ENSG00000000005  0.00000000  0.08398167  0.60139195   0.2409799   4.4756616
## ENSG00000000419 33.13432809 39.93582431 41.36207525  44.8663886  41.5605299
## ENSG00000000457  1.48149383  1.92021291  1.75979885   1.9199313   2.2089933
## ENSG00000000460  1.30065327  0.54949797  0.75463451   0.4644325   0.6545097
## ENSG00000000938  0.01705362  0.01946035  0.03693757   0.1435891   0.6182750
##                 20.S2.KD5NT_S16 21.S7.KD6NT_S19 22.S12.KD3NT_S22 1.KD4NT_S1
## ENSG00000000003      2.91981053      7.24007579       6.04191622   2.721130
## ENSG00000000005      0.01819565      0.03092287       0.00000000   0.000000
## ENSG00000000419     53.71154817     39.44174826      45.42236856  97.377053
## ENSG00000000457      1.02998553      1.53342908       1.72161788   1.638974
## ENSG00000000460      3.09922167      2.03999184       1.65032753   1.159884
## ENSG00000000938      0.00000000      0.00000000       0.00566926   0.000000
##                 10.P6D3_S10 11.P6D9_S11 12.KD3NT_S12 13.P8D0_S13 14.P8D1_S14
## ENSG00000000003  3.04182092   2.3619870     2.007633  2.37629595  2.47737266
## ENSG00000000005  0.03330655   0.3159819     0.000000  0.00000000  0.00000000
## ENSG00000000419 55.34687707  61.8625718    59.451797 42.01958949 48.28312360
## ENSG00000000457  2.02064095   1.9257976     1.452945  1.24041816  1.42477241
## ENSG00000000460  0.46371294   0.3505026     1.110480  0.27099166  0.32147046
## ENSG00000000938  0.10033191   0.3091501     0.000000  0.08574259  0.01490318
##                 15.P8D3_S15 16.P8D9_S16  2.KD5NT_S2   3.P5D0_S3   4.P5D1_S4
## ENSG00000000003  2.40701054   1.4325053  2.17149640  2.05304718  2.05323845
## ENSG00000000005  0.01942692   0.0000000  0.00000000  0.00000000  0.07376914
## ENSG00000000419 55.26450018  30.4398764 68.43831575 54.29605373 62.97810749
## ENSG00000000457  1.99078950   1.9511371  1.14632438  1.56093866  1.77664156
## ENSG00000000460  0.25160256   0.2157656  2.96071334  0.46373842  0.40604554
## ENSG00000000938  0.33312081   0.3322643  0.01986473  0.00937088  0.03418777
##                   5.P5D3_S5  6.P5D9_S6  7.KD6NT_S7   8.P6D0_S8   9.P6D1_S9
## ENSG00000000003  2.85095385  3.1988551  2.11611139  3.65396967  2.11199568
## ENSG00000000005  0.11659502  0.5437334  0.01486434  0.00000000  0.03562279
## ENSG00000000419 57.48804753 57.4549878 71.05122695 43.42671695 75.29026376
## ENSG00000000457  1.93550926  2.1379258  1.45307098  1.94983340  1.81719641
## ENSG00000000460  0.46402550  0.5134805  1.34958572  0.38134185  0.34121346
## ENSG00000000938  0.08105267  0.3779840  0.01377754  0.09247046  0.01650912
##                 13.21417_S53 14.21418_S2 15.21419_S9 16.21420_S16 17.21421_S24
## ENSG00000000003   14.2865244  19.9670327  12.9109419   13.8343917   16.5892979
## ENSG00000000005    0.6167761   0.7768957   0.3267391    3.0427612    2.8011821
## ENSG00000000419   36.1991881  28.8368126  25.0081755   46.1827349   43.6749895
## ENSG00000000457    1.9090162   1.3601721   1.4215021    1.9288800    2.0130346
## ENSG00000000460    0.8320844   0.9401136   0.6962417    0.6710995    0.6981899
## ENSG00000000938    3.1995683   4.6260542   3.4460243    3.6745701    4.3981194
##                 18.21422_S32 19.21423_S40 20.21424_S47 21.21425_S54 22.21426_S3
## ENSG00000000003   17.1528743    9.0381035    8.8393788    9.5489457   7.6205880
## ENSG00000000005    1.9451814    0.9721878    0.6045077    0.2642974   1.8124435
## ENSG00000000419   41.6803305   38.4294219   35.1662323   33.6787557  37.5163734
## ENSG00000000457    1.9813518    1.9312377    1.5891691    1.6755817   1.7366544
## ENSG00000000460    0.7506485    0.7282125    0.6000864    0.4106589   0.4360015
## ENSG00000000938    1.8029603    0.5715975    0.6481190    0.6652296   1.5888471
##                 23.21427_S10 24.21428_S17            gene
## ENSG00000000003     8.960145    8.4071871 ENSG00000000003
## ENSG00000000005     2.368496    1.6611412 ENSG00000000005
## ENSG00000000419    36.592366   34.4896434 ENSG00000000419
## ENSG00000000457     1.826420    1.3572695 ENSG00000000457
## ENSG00000000460     0.463495    0.4265672 ENSG00000000460
## ENSG00000000938     2.374268    1.7839139 ENSG00000000938

Step2 make long for plotting

21k * 2 donors * 7 conditions * 3 reps (+4 extra reps)

long = tidyr::pivot_longer(rpkm, 1:(ncol(rpkm)-1), names_to="sample_id", values_to = "rpkm")
head(long)
## # A tibble: 6 x 3
##   gene            sample_id     rpkm
##   <chr>           <chr>        <dbl>
## 1 ENSG00000000003 6.19190_S20   4.43
## 2 ENSG00000000003 7.19191_S1    8.16
## 3 ENSG00000000003 8.19192_S3    4.39
## 4 ENSG00000000003 9.19193_S6   10.7 
## 5 ENSG00000000003 10.19194_S9   6.40
## 6 ENSG00000000003 11.19195_S12  6.80
#add sample info
long = merge(long, filt_series$samples)
head(long)
##    sample_id            gene        rpkm         group lib.size norm.factors
## 1 1.19200_S5 ENSG00000167476  0.04557251 -2.D13-A.bulk 24704720     1.366482
## 2 1.19200_S5 ENSG00000209082 22.51281884 -2.D13-A.bulk 24704720     1.366482
## 3 1.19200_S5 ENSG00000136051 14.23897850 -2.D13-A.bulk 24704720     1.366482
## 4 1.19200_S5 ENSG00000203808  0.11244166 -2.D13-A.bulk 24704720     1.366482
## 5 1.19200_S5 ENSG00000203780  0.83159053 -2.D13-A.bulk 24704720     1.366482
## 6 1.19200_S5 ENSG00000221930  3.02058173 -2.D13-A.bulk 24704720     1.366482
##   time donor biorep exp construct separation time.donor
## 1   -2 D13-A      5   2    native       bulk   -2.D13-A
## 2   -2 D13-A      5   2    native       bulk   -2.D13-A
## 3   -2 D13-A      5   2    native       bulk   -2.D13-A
## 4   -2 D13-A      5   2    native       bulk   -2.D13-A
## 5   -2 D13-A      5   2    native       bulk   -2.D13-A
## 6   -2 D13-A      5   2    native       bulk   -2.D13-A
#add gene info
long = merge(long, filt_series$genes, by.x="gene", by.y="Geneid")
head(long)
##              gene    sample_id     rpkm         group lib.size norm.factors
## 1 ENSG00000000003   8.19192_S3 4.389524  1.D13-A.bulk 44801786    1.1251937
## 2 ENSG00000000003 24.21428_S17 8.407187 15.D13-A.bulk 51980503    1.0430259
## 3 ENSG00000000003  15.P8D3_S15 2.407011  3.D07-G.bulk 36812579    0.8685081
## 4 ENSG00000000003 12.KD3NT_S12 2.007633 -2.D07-G.bulk 52234909    1.1123288
## 5 ENSG00000000003    3.P5D0_S3 2.053047  0.D07-G.bulk 35877163    0.8561931
## 6 ENSG00000000003 20.21424_S47 8.839379 15.D07-G.bulk 68511442    1.0048101
##   time donor biorep    exp construct separation time.donor Length gene_name
## 1    1 D13-A      7      2    native       bulk    1.D13-A   4535    TSPAN6
## 2   15 D13-A     P7 native    native       bulk   15.D13-A   4535    TSPAN6
## 3    3 D07-G     P8      1    native       bulk    3.D07-G   4535    TSPAN6
## 4   -2 D07-G     P8      1    native       bulk   -2.D07-G   4535    TSPAN6
## 5    0 D07-G     P5      1    native       bulk    0.D07-G   4535    TSPAN6
## 6   15 D07-G   P8_2 native    native       bulk   15.D07-G   4535    TSPAN6
##      description
## 1 tetraspanin 6 
## 2 tetraspanin 6 
## 3 tetraspanin 6 
## 4 tetraspanin 6 
## 5 tetraspanin 6 
## 6 tetraspanin 6

Average per condition

21k * 2 donors * 7 conditions = ~294k

averages = group_by(long, group, gene, time, donor, gene_name, separation, description) %>% summarise(mean=mean(rpkm))
## `summarise()` has grouped output by 'group', 'gene', 'time', 'donor', 'gene_name', 'separation'. You can override using the `.groups` argument.
head(averages); dim(averages)
## # A tibble: 6 x 8
## # Groups:   group, gene, time, donor, gene_name, separation [6]
##   group         gene   time  donor gene_name separation description         mean
##   <fct>         <chr>  <chr> <chr> <chr>     <chr>      <chr>              <dbl>
## 1 -2.D07-G.bulk ENSG0~ -2    D07-G TSPAN6    bulk       "tetraspanin 6 " 3.60e+0
## 2 -2.D07-G.bulk ENSG0~ -2    D07-G TNMD      bulk       "tenomodulin "   9.14e-3
## 3 -2.D07-G.bulk ENSG0~ -2    D07-G DPM1      bulk       "dolichyl-phosp~ 6.21e+1
## 4 -2.D07-G.bulk ENSG0~ -2    D07-G SCYL3     bulk       "SCY1 like pseu~ 1.43e+0
## 5 -2.D07-G.bulk ENSG0~ -2    D07-G C1orf112  bulk       "chromosome 1 o~ 1.91e+0
## 6 -2.D07-G.bulk ENSG0~ -2    D07-G FGR       bulk       "FGR proto-onco~ 5.62e-3
## [1] 296436      8
#NB seems that scale in Rv4 returns a matrix, rather than a vector :P
averages = group_by(averages, gene) %>% mutate(z_score=scale(mean)[,1])
summary(averages[c("mean", "z_score")])
##       mean             z_score       
##  Min.   :    0.00   Min.   :-3.1098  
##  1st Qu.:    0.19   1st Qu.:-0.6763  
##  Median :    1.23   Median :-0.2368  
##  Mean   :   17.84   Mean   : 0.0000  
##  3rd Qu.:    5.25   3rd Qu.: 0.5459  
##  Max.   :44097.56   Max.   : 3.4738
head(averages); dim(averages)
## # A tibble: 6 x 9
## # Groups:   gene [6]
##   group  gene   time  donor gene_name separation description        mean z_score
##   <fct>  <chr>  <chr> <chr> <chr>     <chr>      <chr>             <dbl>   <dbl>
## 1 -2.D0~ ENSG0~ -2    D07-G TSPAN6    bulk       "tetraspanin 6~ 3.60e+0  -0.771
## 2 -2.D0~ ENSG0~ -2    D07-G TNMD      bulk       "tenomodulin "  9.14e-3  -0.654
## 3 -2.D0~ ENSG0~ -2    D07-G DPM1      bulk       "dolichyl-phos~ 6.21e+1   1.73 
## 4 -2.D0~ ENSG0~ -2    D07-G SCYL3     bulk       "SCY1 like pse~ 1.43e+0  -1.45 
## 5 -2.D0~ ENSG0~ -2    D07-G C1orf112  bulk       "chromosome 1 ~ 1.91e+0   2.50 
## 6 -2.D0~ ENSG0~ -2    D07-G FGR       bulk       "FGR proto-onc~ 5.62e-3  -0.604
## [1] 296436      9

Save average rpkms in wide format

averages$sample = gsub("-",".",paste("day", averages$time, averages$donor, averages$separation, sep="_"))
unique(averages$sample)
##  [1] "day_.2_D07.G_bulk"     "day_.2_D13.A_bulk"     "day_0_D07.G_bulk"     
##  [4] "day_0_D13.A_bulk"      "day_1_D07.G_bulk"      "day_1_D13.A_bulk"     
##  [7] "day_15_D07.G_bulk"     "day_15_D07.G_floating" "day_15_D13.A_bulk"    
## [10] "day_15_D13.A_floating" "day_3_D07.G_bulk"      "day_3_D13.A_bulk"     
## [13] "day_9_D07.G_bulk"      "day_9_D13.A_bulk"
wide = pivot_wider(averages[c("gene_name", "sample", "mean")], names_from ="sample", values_from = "mean", 
                   values_fn=mean) #using gene names has a few overlaps, if so just take the mean

head(wide); nrow(wide)
## # A tibble: 6 x 15
##   gene_name day_.2_D07.G_bulk day_.2_D13.A_bu~ day_0_D07.G_bulk day_0_D13.A_bulk
##   <chr>                 <dbl>            <dbl>            <dbl>            <dbl>
## 1 TSPAN6              3.60              6.77             2.69             6.28  
## 2 TNMD                0.00914           0                0                0.0312
## 3 DPM1               62.1              41.2             46.6             38.2   
## 4 SCYL3               1.43              1.34             1.58             1.83  
## 5 C1orf112            1.91              1.65             0.372            0.564 
## 6 FGR                 0.00562           0.0198           0.0625           0.0282
## # ... with 10 more variables: day_1_D07.G_bulk <dbl>, day_1_D13.A_bulk <dbl>,
## #   day_15_D07.G_bulk <dbl>, day_15_D07.G_floating <dbl>,
## #   day_15_D13.A_bulk <dbl>, day_15_D13.A_floating <dbl>,
## #   day_3_D07.G_bulk <dbl>, day_3_D13.A_bulk <dbl>, day_9_D07.G_bulk <dbl>,
## #   day_9_D13.A_bulk <dbl>
## [1] 21131
write.table(wide, file.path(ous_dir, "Feb22_RNAseq/03limma/rpkm_tmm_means.csv"), sep=",", row.names = F)

Fix the time factors, also adding a psuedotime where floating day15 samples are beyond bulk day15s

averages$time = factor(gsub("-",".",averages$time), levels=c(".2",0,1,3,9,15))
averages$pstime = as.character(averages$time)
averages$pstime[averages$separation == "floating"] = "15float"
averages$pstime = factor(averages$pstime, levels=c(".2",0,1,3,9,15, "15float"))
write.table(averages, file.path(ous_dir, "Feb22_RNAseq/03limma/plotting_mean_rpkm.tab"), sep="\t")
ggplot(averages[averages$gene_name == "PPARG",]) +
  geom_line(aes(x=pstime, y=mean, group= donor, colour=donor))

ggplot(averages[averages$gene_name == "HOTAIR",]) +
  geom_line(aes(x=pstime, y=mean, group= donor, colour=donor))

ggplot(averages[averages$gene_name == "FABP4",]) +
  geom_line(aes(x=pstime, y=mean, group= donor, colour=donor))

ggplot(averages[averages$gene_name == "CEBPA",]) +
  geom_line(aes(x=pstime, y=mean, group= donor, colour=donor))

ggplot(averages[averages$gene_name == "CIDEA",]) +
  geom_line(aes(x=pstime, y=mean, group= donor, colour=donor))

## Load clusters and plot with the extra time points

This will give some initial indication as to whether cluster identity can in any way predict day15 expression.

oriclust = read.delim(file.path(ous_dir, "D13rnaseqDec19/clust/match&merge_feb20/clusters_11-02-2020.tab"),
                      header=T)
head(oriclust)
##              gene gene_name
## 1 ENSG00000003249    DBNDD1
## 2 ENSG00000067715      SYT1
## 3 ENSG00000070808    CAMK2A
## 4 ENSG00000084628    NKAIN1
## 5 ENSG00000091137   SLC26A4
## 6 ENSG00000101638   ST8SIA5
##                                                     description
## 1                                dysbindin domain containing 1 
## 2                                              synaptotagmin 1 
## 3         calcium/calmodulin dependent protein kinase II alpha 
## 4           sodium/potassium transporting ATPase interacting 1 
## 5                            solute carrier family 26 member 4 
## 6 ST8 alpha-N-acetyl-neuraminide alpha-2,8-sialyltransferase 5 
##   merged.cluster.glut merged.cluster.abdo genes.abdo   Ave.Expr
## 1                   0                   1       1505 -2.4906993
## 2                   0                   1       1505 -3.0152061
## 3                   0                   1       1505 -2.8504277
## 4                   0                   1       1505 -1.7988766
## 5                   0                   1       1505 -0.7368722
## 6                   0                   1       1505 -2.4129353
##   total_time_score adj.P.Val.time total_donor_effect adj.P.Val.donor
## 1        -7.998479   3.452301e-06         2.20357137    5.538658e-01
## 2       -13.510137   1.175280e-09         9.10389182    4.147443e-05
## 3        -9.301606   4.022025e-08         2.93110477    7.654837e-03
## 4        -5.570890   1.927477e-07        -0.16189159    1.359138e-04
## 5        -2.443903   6.648553e-04        -0.03461714    1.564329e-01
## 6        -5.820780   1.439061e-07        14.30018755    3.549713e-09
##   cluster.glut cluster.abdo new.cluster.abdo
## 1            0            3                1
## 2            0            3                1
## 3            0            3                1
## 4            0            3                1
## 5            0            3                1
## 6            0            3                1
oriclust$merged.cluster.glut = factor(oriclust$merged.cluster.glut, levels = c(0:10))
oriclust$merged.cluster.abdo = factor(oriclust$merged.cluster.abdo, levels = c(0:16))
earclust = merge(averages, oriclust[c("gene","merged.cluster.glut","merged.cluster.abdo")])
earclust$gene.donor = paste(earclust$gene_name, sep=".", earclust$donor)
head(earclust)
##              gene         group time donor gene_name separation  description
## 1 ENSG00000000005  1.D13-A.bulk    1 D13-A      TNMD       bulk tenomodulin 
## 2 ENSG00000000005  0.D13-A.bulk    0 D13-A      TNMD       bulk tenomodulin 
## 3 ENSG00000000005 -2.D07-G.bulk   .2 D07-G      TNMD       bulk tenomodulin 
## 4 ENSG00000000005 15.D13-A.bulk   15 D13-A      TNMD       bulk tenomodulin 
## 5 ENSG00000000005  3.D07-G.bulk    3 D07-G      TNMD       bulk tenomodulin 
## 6 ENSG00000000005  9.D07-G.bulk    9 D07-G      TNMD       bulk tenomodulin 
##          mean    z_score            sample pstime merged.cluster.glut
## 1 0.222041790 -0.4398654  day_1_D13.A_bulk      1                   8
## 2 0.031155024 -0.6319792  day_0_D13.A_bulk      0                   8
## 3 0.009140408 -0.6541353 day_.2_D07.G_bulk     .2                   8
## 4 1.947360246  1.2965432 day_15_D13.A_bulk     15                   8
## 5 0.056442830 -0.6065288  day_3_D07.G_bulk      3                   8
## 6 0.286571779 -0.3749206  day_9_D07.G_bulk      9                   8
##   merged.cluster.abdo gene.donor
## 1                   8 TNMD.D13-A
## 2                   8 TNMD.D13-A
## 3                   8 TNMD.D07-G
## 4                   8 TNMD.D13-A
## 5                   8 TNMD.D07-G
## 6                   8 TNMD.D07-G

In glut:

ggplot(earclust[earclust$donor == "D07-G",], aes(x=pstime, y=z_score)) +
  geom_line(aes(group=gene.donor, colour=time %in% c(9, 15, "15float")),
            alpha=0.2) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=merged.cluster.glut)) +
  facet_wrap(~merged.cluster.glut)
## `geom_smooth()` using formula 'y ~ x'

ggplot(earclust[earclust$donor == "D07-G",], aes(x=pstime, y=z_score)) +
  geom_boxplot(aes(group=pstime, fill=time == 15)) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=merged.cluster.glut)) +
  facet_wrap(~merged.cluster.glut)
## `geom_smooth()` using formula 'y ~ x'

Donor 7 analysis: For cluster 8 (adipogenesis) genes, most drop in expression from day9 to bulk day15. However, in floating cells cluster 8 genes remain highly expressed on average. There is a large variation in the expression of cluster8 genes in floating samples; indicating the

ggplot(earclust[earclust$donor == "D13-A",], aes(x=pstime, y=z_score)) +
  geom_line(aes(group=gene.donor, colour=time %in% c(9, 15, "15float")),
            alpha=0.2) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=merged.cluster.abdo)) +
  facet_wrap(~merged.cluster.abdo)
## `geom_smooth()` using formula 'y ~ x'

ggplot(earclust[earclust$donor == "D13-A",], aes(x=pstime, y=z_score)) +
  geom_boxplot(aes(group=pstime, fill=time == 15)) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=merged.cluster.abdo)) +
  facet_wrap(~merged.cluster.abdo)
## `geom_smooth()` using formula 'y ~ x'

Abdo custer 8 has the same trend, but it is less pronounced, as bulk and floating day15 expression patterns are more similar.

Overall I would say that the floating cells tend to continue the trend of the clusters, whilst bulk samples are more often have different gene expression between day9 and day15.

ggplot(earclust[earclust$merged.cluster.glut == 8,], aes(x=pstime, y=z_score)) +
  geom_line(aes(group=gene.donor, colour=time %in% c(9, 15, "15float")),
            alpha=0.05) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=merged.cluster.glut)) +
  scale_color_discrete(name="new data") +facet_wrap(~donor)
## `geom_smooth()` using formula 'y ~ x'

ggplot(earclust[earclust$merged.cluster.glut == 1,], aes(x=pstime, y=z_score)) +
  geom_line(aes(group=gene.donor, colour=time %in% c(9, 15, "15float")),
            alpha=0.05) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=merged.cluster.glut)) +
  scale_color_discrete(name="new data") +facet_wrap(~donor)
## `geom_smooth()` using formula 'y ~ x'

Clearly significant genes in cluster 8 tend to decrease expr d9->bulk d15. And have a tendency to continue decreasing in floating…. though thats’ only bae on he loess curve, the underlying data is very dispersed.

ggplot(earclust[earclust$merged.cluster.glut == 8 & earclust$donor == "D07-G",], 
       aes(x=pstime, y=z_score)) +
  geom_line(aes(group=gene.donor, colour=time %in% c(9, 15, "15float")),
            alpha=0.05) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=merged.cluster.glut)) +
  scale_color_discrete(name="new data") +facet_wrap(~gene %in% rownames(result)[!result[,"d9_to_d15.D7.bulk"] == 0 ])
## `geom_smooth()` using formula 'y ~ x'

ggplot(earclust[earclust$merged.cluster.abdo == 8 & earclust$donor == "D13-A",], 
       aes(x=pstime, y=z_score)) +
  geom_line(aes(group=gene.donor, colour=time %in% c(9, 15, "15float")),
            alpha=0.05) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=merged.cluster.abdo)) +
  scale_color_discrete(name="new data") + facet_wrap(~gene %in% rownames(result)[!result[,"d9_to_d15.D13.bulk"] == 0 ])
## `geom_smooth()` using formula 'y ~ x'

ggplot(earclust[earclust$merged.cluster.glut == 1 & earclust$donor == "D07-G",], 
       aes(x=pstime, y=z_score)) +
  geom_line(aes(group=gene.donor, colour=time %in% c(9, 15, "15float")),
            alpha=0.1) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=merged.cluster.glut)) +
  scale_color_discrete(name="new data") +facet_wrap(~gene %in% rownames(result)[!result[,"d9_to_d15.D7.bulk"] == 0 ])
## `geom_smooth()` using formula 'y ~ x'

ggplot(earclust[earclust$merged.cluster.glut == 1 & earclust$donor == "D07-G",], 
       aes(x=pstime, y=z_score)) +
  geom_line(aes(group=gene.donor, colour=time %in% c(9, 15, "15float")),
            alpha=0.1) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=merged.cluster.glut)) +
  scale_color_discrete(name="new data") +facet_wrap(~gene %in% rownames(result)[!result[,"d9_to_d15.D7.floating"] == 0 ])
## `geom_smooth()` using formula 'y ~ x'

ggplot(earclust[earclust$merged.cluster.abdo == 1 & earclust$donor == "D13-A",], 
       aes(x=pstime, y=z_score)) +
  geom_line(aes(group=gene.donor, colour=time %in% c(9, 15, "15float")),
            alpha=0.1) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=merged.cluster.abdo)) +
  scale_color_discrete(name="new data") + facet_wrap(~gene %in% rownames(result)[!result[,"d9_to_d15.D13.bulk"] == 0 ])
## `geom_smooth()` using formula 'y ~ x'

The final state of cluster1 genes is less predictable from their dynamics prior to diff. onset. There is a big donor difference: in donor7 cluster one genes seems o be evenly split between being up/down regulated in both fractions or in just one. Whilst in D13 the trends are more mild.

Classifying d15 info

If we can make a column containg the d15 state information: e.g. float_up, D7.float_up, D13.float_up, bulk_up, ect. then we can facet based on that column to get a more granular cluster view.

#head(result)

granule =result[,grep("d9_to_d15.*",colnames(result))]
granule[granule == 1] = "up"
granule[granule == -1] = "down"

granule = data.frame(granule)

for (i in 1:nrow(granule)){
  if (granule$d9_to_d15.D7.floating[i] == granule$d9_to_d15.D13.floating[i] & 
      granule$d9_to_d15.D7.bulk[i] == granule$d9_to_d15.D13.bulk[i] &
      granule$d9_to_d15.D7.floating[i] == granule$d9_to_d15.D13.bulk[i]){
     granule$d15_status[i] = paste("day15", sep="_", granule$d9_to_d15.D7.floating[i])
    #sig in bulk and floating
  } else if (granule$d9_to_d15.D7.floating[i] == granule$d9_to_d15.D13.floating[i]){
    granule$d15_status[i] = paste("float", sep="_", granule$d9_to_d15.D7.floating[i])
    #sig in floating
  } else if (granule$d9_to_d15.D7.bulk[i] == granule$d9_to_d15.D13.bulk[i]){
    #sig in bulk
    granule$d15_status[i] = paste("bulk", sep="_", granule$d9_to_d15.D7.bulk[i])
  } else if (granule$d9_to_d15.D7.floating[i] == granule$d9_to_d15.D7.bulk[i]){
    #sig in donor7
     granule$d15_status[i] = paste("d7", sep="_", granule$d9_to_d15.D7.bulk[i])
  } else if (granule$d9_to_d15.D13.floating[i] == granule$d9_to_d15.D13.bulk[i]){
    #sig in donor13
     granule$d15_status[i] = paste("d13", sep="_", granule$d9_to_d15.D13.bulk[i])
  } else {
    interesting = colnames(granule)
    granule$d15_status[i] = paste(colnames(granule)[1:ncol(granule)-1][granule[i,1:(ncol(granule)-1)] != 0], collapse="_&_")
  }
}
summary(as.factor(granule$d15_status))
##                                     bulk_0 
##                                       3147 
##                                  bulk_down 
##                                        310 
##                                    bulk_up 
##                                        578 
##                                      d13_0 
##                                         60 
##                                   d13_down 
##                                          4 
##                                     d13_up 
##                                          3 
##                                       d7_0 
##                                        396 
##                                    d7_down 
##                                        394 
##                                      d7_up 
##                                        586 
## d9_to_d15.D7.bulk_&_d9_to_d15.D13.floating 
##                                         87 
## d9_to_d15.D7.floating_&_d9_to_d15.D13.bulk 
##                                        325 
##                                    day15_0 
##                                       8168 
##                                 day15_down 
##                                       1726 
##                                   day15_up 
##                                        886 
##                                    float_0 
##                                       2515 
##                                 float_down 
##                                        714 
##                                   float_up 
##                                       1275

merging into plottng dataframe

granule$gene = rownames(granule)
earclust = merge(earclust, granule[c('gene','d15_status')])
head(earclust)
##              gene         group time donor gene_name separation  description
## 1 ENSG00000000005  1.D13-A.bulk    1 D13-A      TNMD       bulk tenomodulin 
## 2 ENSG00000000005  0.D13-A.bulk    0 D13-A      TNMD       bulk tenomodulin 
## 3 ENSG00000000005 -2.D07-G.bulk   .2 D07-G      TNMD       bulk tenomodulin 
## 4 ENSG00000000005 15.D13-A.bulk   15 D13-A      TNMD       bulk tenomodulin 
## 5 ENSG00000000005  3.D07-G.bulk    3 D07-G      TNMD       bulk tenomodulin 
## 6 ENSG00000000005  9.D07-G.bulk    9 D07-G      TNMD       bulk tenomodulin 
##          mean    z_score            sample pstime merged.cluster.glut
## 1 0.222041790 -0.4398654  day_1_D13.A_bulk      1                   8
## 2 0.031155024 -0.6319792  day_0_D13.A_bulk      0                   8
## 3 0.009140408 -0.6541353 day_.2_D07.G_bulk     .2                   8
## 4 1.947360246  1.2965432 day_15_D13.A_bulk     15                   8
## 5 0.056442830 -0.6065288  day_3_D07.G_bulk      3                   8
## 6 0.286571779 -0.3749206  day_9_D07.G_bulk      9                   8
##   merged.cluster.abdo gene.donor d15_status
## 1                   8 TNMD.D13-A    day15_0
## 2                   8 TNMD.D13-A    day15_0
## 3                   8 TNMD.D07-G    day15_0
## 4                   8 TNMD.D13-A    day15_0
## 5                   8 TNMD.D07-G    day15_0
## 6                   8 TNMD.D07-G    day15_0
earclust$d15_status = factor(earclust$d15_status, 
                             levels = c(
                               paste("day15",sep="_",c("up","down",0)),
                               paste("float", sep="_", c("up","down",0)),
                               paste("bulk", sep="_", c("up","down",0)),
                               paste("d7", sep="_", c("up","down",0)),
                               paste("d13", sep="_", c("up","down",0)),
                               "d9_to_d15.D7.bulk_&_d9_to_d15.D13.floating",
                               "d9_to_d15.D7.floating_&_d9_to_d15.D13.bulk"
                             ))
summary(earclust$d15_status)
##                                   day15_up 
##                                       7042 
##                                 day15_down 
##                                       8694 
##                                    day15_0 
##                                      34916 
##                                   float_up 
##                                       7518 
##                                 float_down 
##                                       3990 
##                                    float_0 
##                                      19880 
##                                    bulk_up 
##                                       5782 
##                                  bulk_down 
##                                       2016 
##                                     bulk_0 
##                                      16506 
##                                      d7_up 
##                                       2814 
##                                    d7_down 
##                                       1890 
##                                       d7_0 
##                                       2198 
##                                     d13_up 
##                                         28 
##                                   d13_down 
##                                         28 
##                                      d13_0 
##                                        714 
## d9_to_d15.D7.bulk_&_d9_to_d15.D13.floating 
##                                        798 
## d9_to_d15.D7.floating_&_d9_to_d15.D13.bulk 
##                                       1372
earclust[is.na(earclust$d15_status),]
##  [1] gene                group               time               
##  [4] donor               gene_name           separation         
##  [7] description         mean                z_score            
## [10] sample              pstime              merged.cluster.glut
## [13] merged.cluster.abdo gene.donor          d15_status         
## <0 rows> (or 0-length row.names)

Plot cluster 8 split by d15 status

Excepting genes that don’t change in day9 to day15;

The largest groups of genes are those downregulated in both bulk and floating cells; and those up in floating cells. There’s also some genes downregulated in bulk only.

ggplot(earclust[earclust$merged.cluster.glut == 8 & earclust$donor == "D07-G",], 
       aes(x=pstime, y=z_score)) +
  geom_line(aes(group=gene.donor, colour=time %in% c(9, 15)),
            alpha=0.1) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=merged.cluster.glut)) +
  scale_color_discrete(name="new data") +facet_wrap(~d15_status) 
## `geom_smooth()` using formula 'y ~ x'

ggplot(earclust[earclust$merged.cluster.abdo == 8 & earclust$donor == "D13-A",], 
       aes(x=pstime, y=z_score)) +
  geom_line(aes(group=gene.donor, colour=time %in% c(9, 15)),
            alpha=0.1) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=merged.cluster.abdo)) +
  scale_color_discrete(name="new data") +facet_wrap(~d15_status) 
## `geom_smooth()` using formula 'y ~ x'

ggplot(earclust[earclust$merged.cluster.abdo == 8 & earclust$donor == "D13-A" &
                  grepl("^(day15|float|bulk)",earclust$d15_status),], 
       aes(x=pstime, y=z_score)) +
  geom_line(aes(group=gene.donor, colour=time %in% c(9, 15)),
            alpha=0.1) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=merged.cluster.abdo)) +
  scale_color_discrete(name="new data") +facet_wrap(~d15_status) 
## `geom_smooth()` using formula 'y ~ x'

ggplot(earclust[earclust$merged.cluster.glut == 8 & earclust$donor == "D07-G" &
                  grepl("^(day15|float|bulk)",earclust$d15_status),], 
       aes(x=pstime, y=z_score)) +
  geom_line(aes(group=gene.donor, colour=time %in% c(9, 15)),
            alpha=0.1) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=merged.cluster.glut)) +
  scale_color_discrete(name="new data") +facet_wrap(~d15_status)  + theme_bw(base_size=20)
## `geom_smooth()` using formula 'y ~ x'

In cluster 1 the largest groups are upregulated

ggplot(earclust[earclust$merged.cluster.glut == 1 & earclust$donor == "D07-G",], 
       aes(x=pstime, y=z_score)) +
  geom_line(aes(group=gene.donor, colour=time %in% c(9, 15)),
            alpha=0.4) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=merged.cluster.glut)) +
  scale_color_discrete(name="new data") +facet_wrap(~d15_status) 
## `geom_smooth()` using formula 'y ~ x'

ggplot(earclust[earclust$merged.cluster.abdo == 1 & earclust$donor == "D13-A",], 
       aes(x=pstime, y=z_score)) +
  geom_line(aes(group=gene.donor, colour=time %in% c(9, 15)),
            alpha=0.4) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=merged.cluster.abdo)) +
  scale_color_discrete(name="new data") +facet_wrap(~d15_status) 
## `geom_smooth()` using formula 'y ~ x'

ggplot(earclust[earclust$merged.cluster.glut == 1 & earclust$donor == "D07-G" &
                  grepl("^(day15|float|bulk)",earclust$d15_status),], 
       aes(x=pstime, y=z_score)) +
  geom_line(aes(group=gene.donor, colour=time %in% c(9, 15)),
            alpha=0.1) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=merged.cluster.glut)) +
  scale_color_discrete(name="new data") +facet_wrap(~d15_status)  + theme_bw(base_size=20)
## `geom_smooth()` using formula 'y ~ x'

day9->day15 changes in other clusters

Excepting the largest clusters 1 and 8, below I look at the distribution of genes DE in adipocytes among RNAseq clusters from early differentiation.

Much like cluster1 genes, cluster 2 and 3 genes are well represented in genes that are upregulated at day15. Whilst genes that are downregulated at day15 are more likely to have been in cluster 6 or 7. Genes upregulated in floating cells are a mix of early differentiation clusters. Genes downregulated in floating cells likewise largely belong to clusters 6 and 7. Genes upregulated in bulk adipocytes likewise belong largely to clusters 2 and 3.

This makes some sense. Genes that were already decreasing expression by day9, tend to continue that trend by day 15. Genes that decreased expression early in differentiation are equally likely to regain expression in adipocytes, particularly in the bulk fraction where a more heterogenous population of cells exists. Gene expression levels needed to drive adipogenesis are different tha those require to maintain mature adipocyte state.

ggplot(earclust[!earclust$merged.cluster.abdo %in% c(1,8) & earclust$donor == "D13-A",], 
       aes(x=pstime, y=z_score)) +
  geom_line(aes(group=gene.donor, colour=merged.cluster.abdo),
            alpha=0.2) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=d15_status)) +
  scale_color_discrete(name="new data") +facet_wrap(~d15_status)  + 
  guides(colour = guide_legend(override.aes = list(alpha = 1))) + theme_classic(base_size=20)
## `geom_smooth()` using formula 'y ~ x'

ggplot(earclust[!earclust$merged.cluster.abdo %in% c(1,8) & earclust$donor == "D13-A" &
                  earclust$d15_status %in% c(paste("day15",sep="_",c("up","down")),
                               paste("float", sep="_", c("up","down")),
                               paste("bulk", sep="_", c("up","down"))),], 
       aes(x=pstime, y=z_score)) +
  geom_line(aes(group=gene.donor, colour=merged.cluster.abdo),
            alpha=0.4) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=d15_status)) +
  scale_color_discrete(name="new data") +facet_wrap(~d15_status)  + 
  guides(colour = guide_legend(override.aes = list(alpha = 1))) + theme_classic(base_size = 20)
## `geom_smooth()` using formula 'y ~ x'

ggplot(earclust[!earclust$merged.cluster.glut %in% c(1,8) & earclust$donor == "D07-G" &
                  earclust$d15_status %in% c(paste("day15",sep="_",c("up","down")),
                               paste("float", sep="_", c("up","down")),
                               paste("bulk", sep="_", c("up","down"))),], 
       aes(x=pstime, y=z_score)) +
  geom_line(aes(group=gene.donor, colour=merged.cluster.glut),
            alpha=0.4) +
  geom_smooth(method=loess, colour="black", se=F, aes(group=d15_status)) +
  scale_color_discrete(name="new data") +facet_wrap(~d15_status)  + 
  guides(colour = guide_legend(override.aes = list(alpha = 1))) + theme_classic(base_size = 20)
## `geom_smooth()` using formula 'y ~ x'

##saveearclust here

##Other than reclustering, we can visualise those DE day9-> day15

#clusters made on previous sig genes (minus the donor specific genes)
early_sig = unique(earclust$gene)
length(early_sig) #8299
## [1] 8299
#sig genes with extra times
all_sig = unique(comb$Geneid[comb$adj.P.Val < 0.01])
length(all_sig) #11639
## [1] 11639
summary(early_sig %in% all_sig) #2529 genes are dropped when day15 included 
##    Mode   FALSE    TRUE 
## logical    2529    5770
summary(all_sig %in% early_sig) #5869 new genes are different n the day15 samples
##    Mode   FALSE    TRUE 
## logical    5869    5770

how many day15 sig genes are donor-specific? (as in not expressed in the other donor). Just 1! that’s incredible

dropouts = group_by(averages, donor, gene) %>% filter(mean == 0) %>% count()
summary(dropouts$n >6)
##    Mode   FALSE    TRUE 
## logical    3153       3
summary(dropouts$gene %in% all_sig & dropouts$n > 6)
##    Mode   FALSE    TRUE 
## logical    3155       1
summary(dropouts$gene %in% early_sig & dropouts$n > 6)
##    Mode   FALSE 
## logical    3156
bulk = topTable(pair_fit, coef = c("d9_to_d15.D7.bulk", "d9_to_d15.D13.bulk"), number = nrow(pair_fit$genes), p.value = 0.01)
summary(bulk$d9_to_d15.D7.bulk * bulk$d9_to_d15.D13.bulk < 0)
##    Mode   FALSE    TRUE 
## logical    8413     472
bulk = bulk[bulk$d9_to_d15.D7.bulk * bulk$d9_to_d15.D13.bulk > 0,] #sig genes must have the same sign
head(bulk); nrow(bulk) #8k genes diff between day9 and 15 :o
##                          Geneid Length  gene_name
## ENSG00000214199 ENSG00000214199   1346  EEF1A1P12
## ENSG00000236972 ENSG00000236972    411    FABP5P1
## ENSG00000249855 ENSG00000249855   1382  EEF1A1P19
## ENSG00000259308 ENSG00000259308    404 AC024270.1
## ENSG00000230383 ENSG00000230383    862 AC009245.1
## ENSG00000236044 ENSG00000236044    408    FABP5P2
##                                                                            description
## ENSG00000214199      eukaryotic translation elongation factor 1 alpha 1 pseudogene 12 
## ENSG00000236972                             fatty acid binding protein 5 pseudogene 1 
## ENSG00000249855      eukaryotic translation elongation factor 1 alpha 1 pseudogene 19 
## ENSG00000259308 fatty acid binding protein 5 (psoriasis-associated) (FABP5) pseudogene
## ENSG00000230383                                 ribosomal protein L6 (RPL6) pseudogene
## ENSG00000236044                             fatty acid binding protein 5 pseudogene 2 
##                 d9_to_d15.D7.bulk d9_to_d15.D13.bulk    AveExpr        F
## ENSG00000214199         -7.303966          -7.095949  5.1116184 394.7706
## ENSG00000236972         -5.867698          -5.688479  1.6957432 340.9487
## ENSG00000249855         -4.249684          -4.265037  6.9259314 302.0047
## ENSG00000259308         -8.622021          -7.743733 -2.6290392 299.3378
## ENSG00000230383         -4.584224          -5.761866  5.0965401 289.0667
## ENSG00000236044         -6.926677          -5.769900  0.8331876 279.7052
##                      P.Value    adj.P.Val
## ENSG00000214199 2.771171e-26 5.867678e-22
## ENSG00000236972 3.504469e-25 3.710181e-21
## ENSG00000249855 3.570235e-24 1.944460e-20
## ENSG00000259308 3.673297e-24 1.944460e-20
## ENSG00000230383 7.845518e-24 3.322420e-20
## ENSG00000236044 1.314769e-23 4.639821e-20
## [1] 8413